home *** CD-ROM | disk | FTP | other *** search
- # SpamAssassin rules file: URI tests
- #
- # Please don't modify this file as your changes will be overwritten with
- # the next update. Use @@LOCAL_RULES_DIR@@/local.cf instead.
- # See 'perldoc Mail::SpamAssassin::Conf' for details.
- #
- # <@LICENSE>
- # Copyright 2004 Apache Software Foundation
- #
- # Licensed under the Apache License, Version 2.0 (the "License");
- # you may not use this file except in compliance with the License.
- # You may obtain a copy of the License at
- #
- # http://www.apache.org/licenses/LICENSE-2.0
- #
- # Unless required by applicable law or agreed to in writing, software
- # distributed under the License is distributed on an "AS IS" BASIS,
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- # See the License for the specific language governing permissions and
- # limitations under the License.
- # </@LICENSE>
- #
- ###########################################################################
-
- require_version @@VERSION@@
-
- uri NUMERIC_HTTP_ADDR /^https?\:\/\/\d{7,}/is
- describe NUMERIC_HTTP_ADDR Uses a numeric IP address in URL
-
- uri NORMAL_HTTP_TO_IP /^https?\:\/\/(?:\S*\@)?\d+\.\d+\.\d+\.\d+/i
- describe NORMAL_HTTP_TO_IP Uses a dotted-decimal IP address in URL
-
- # Theo sez:
- # Have gotten FPs off this, and whitespace can't be in the host, so...
- # % Visit my homepage: http://i.like.foo.com %
- uri HTTP_ESCAPED_HOST /^https?\:\/\/[^\/\s]*%[0-9a-fA-F][0-9a-fA-F]/
- describe HTTP_ESCAPED_HOST Uses %-escapes inside a URL's hostname
-
- # note: do not match \r or \n
- uri HTTP_CTRL_CHARS_HOST /^https?\:\/\/[^\/\s]*[\x00-\x08\x0b\x0c\x0e-\x1f]/
- describe HTTP_CTRL_CHARS_HOST Uses control sequences inside a URL hostname
-
- # look for URI with escaped 0-9, A-Z, or a-z characters (all other safe
- # characters have been well-tested, but are sometimes unnecessarily escaped
- # in nonspam; requiring "http" or "https" also reduces false positives).
- uri HTTP_EXCESSIVE_ESCAPES /^https?:\/\/\S*%(?:3\d|[46][1-9a-f]|[57][\da])/i
- describe HTTP_EXCESSIVE_ESCAPES Completely unnecessary %-escapes inside a URL
-
- # bug 1801
- uri IP_LINK_PLUS /^https?\:\/\/(?:\S*\@)?\d+\.\d+\.\d+\.\d+.{0,20}(?:cgi|click|ads|id\=)/i
- describe IP_LINK_PLUS Dotted-decimal IP address followed by CGI
-
- uri REMOVE_PAGE /^https?:\/\/[^\/]+\/.*?remove/
- describe REMOVE_PAGE URL of page called "remove"
-
- uri MAILTO_TO_SPAM_ADDR /^mailto:[a-z]+\d{2,}\@/is
- describe MAILTO_TO_SPAM_ADDR Includes a link to a likely spammer email
-
- uri MAILTO_TO_REMOVE /^mailto:.*?remove/is
- describe MAILTO_TO_REMOVE Includes a 'remove' email address
-
- # allow ports 80 and 443 which are http and https, respectively
- # we don't want to hit http://www.cnn.com:USArticle1840@www.liquidshirts.com/
- # though, which actually doesn't have a weird port in it.
- uri WEIRD_PORT m{https?://[^/\s]+?:\d+(?<!:80)(?<!:443)(?<!:8080)(?:/|\s|$)}
- describe WEIRD_PORT Uses non-standard port number for HTTP
-
- # looks for a (maybe empty) username and (optional) password in an url
- uri USERPASS m{^https?://[^/\s]*?(?::[^/\s]+?)?\@}
- describe USERPASS URL contains username and (optional) password
-
- uri URI_IS_POUND m{\#$}
- describe URI_IS_POUND Filename is just a '\#'; probably a JS trick
-
- uri BARGAIN_URL /bargain([sz]|-\S+)?\.(?:com|biz)/
- describe BARGAIN_URL Includes a link to a likely spammer domain
-
- # this is somewhat loose, but results are good
- uri BIZ_TLD /\.biz(?:\/|$)/i
- describe BIZ_TLD Contains an URL in the BIZ top-level domain
-
- uri INFO_TLD /^(?:https?:\/\/|mailto:)[^\/]+\.info(?:\/|$)/i
- describe INFO_TLD Contains an URL in the INFO top-level domain
-
- # Matt Cline
- # Pretty good for most folks, except for jm: I have a really stupid
- # e-commerce bunch obfuscating their URLs with this for some reason. screw 'em
- # jm: hesitant to remove this outright; it should be good against phishers
- #uri HTTP_ENTITIES_HOST m{https?://[^\s\">/]*\&\#[\da-f]+}i
- #describe HTTP_ENTITIES_HOST URI obscured with character entities
-
- uri YAHOO_RD_REDIR m{^https?\://rd\.yahoo\.com/(?:[0-9]{4,}|partner\b|dir\b)}i
- describe YAHOO_RD_REDIR Has Yahoo Redirect URI
-
- uri YAHOO_DRS_REDIR m{^https?://drs\.yahoo\.com/}i
- describe YAHOO_DRS_REDIR Has Yahoo Redirect URI
-
- uri URI_OFFERS m/offer([sz]|-\S+)?\.(?:com|bi?z)/i
- describe URI_OFFERS Message has link to company offers
-
- uri URI_4YOU m@^(?:https?://|mailto:)[^\/]*4you@i
- describe URI_4YOU Message has URI 4you
-
- # 0 nonspam hits, hundreds of spam hits. Serious problems there
- uri TERRA_ES /terra\.es\//i
- describe TERRA_ES Contains URI to a document hosted at 'terra.es'
-
- # "www" hidden as "%77%77%77", "ww%77", etc.
- # note: *not* anchored to start of string, to catch use of redirectors
- uri HTTP_77 /http:\/\/.{0,2}\%77/
- describe HTTP_77 Contains an URL-encoded hostname (HTTP77)
-
- # affiliateid, aff_id, aff_sub_id etc.
- uri URI_AFFILIATE /aff\w+id=/i
- describe URI_AFFILIATE Contains a URI with an affiliate ID code
-
- # really a URI rule
- header URI_REDIRECTOR eval:check_for_http_redirector()
- describe URI_REDIRECTOR Message has HTTP redirector URI
-